Read in the data

#normal data
ed <- fread(here::here("data/education/combined_education.csv")) %>% 
  mutate(year = factor(year))
ed$year <- revalue(ed$year,c("1415" = "2014-2015", "1516" = "2015-2016", "1617" = "2016-2017",
          "1718" = "2017-2018", "1819" = "2018-2019"))
ed <- ed %>% filter(District.Name == "South Wasco County SD 1" |
           District.Name == "Jefferson County SD 509J" |
           District.Name == "North Wasco County SD 21" |
           District.Name == "Sherman County SD" |
           District.Name == "Dufur SD 29" |
           District.Name == "Hood River County SD") %>% 
  select(-c("District.ID" ,"Student.Group","Fall.Membership", "Free.Reduced.Priced.Lunch","Percent.Regular.Attenders" )) 

ed.increase <- ed %>% select("year" ,"District.Name","On.Time.Grad.Rate", "Teacher.Experience.Pct","Percent.ELA.Proficient.Change")
ed.decrease <- ed %>% select("year" ,"District.Name","Percent.Economically.Disadvantaged", "Percent.Chronically.Absent","Dropout.Rate")

#scaling the data within each measure to standardize the values.
#con: suppresses the minor differences in variables that have a smaller range
ed.scaled <- ed %>% mutate(Percent.ELA.Proficient.Change = scale(Percent.ELA.Proficient.Change),
                                  Percent.Chronically.Absent = scale(Percent.Chronically.Absent),
                                  Percent.Economically.Disadvantaged = scale(Percent.Economically.Disadvantaged),
                                  Teacher.Experience.Pct = scale(Teacher.Experience.Pct),
                                  Dropout.Rate = scale(Dropout.Rate),
                                  On.Time.Grad.Rate = scale(On.Time.Grad.Rate))
ed.scaled <- ed.scaled %>% filter(District.Name == "South Wasco County SD 1" |
                      District.Name == "Jefferson County SD 509J" |
                      District.Name == "North Wasco County SD 21" |
                      District.Name == "Sherman County SD" |
                      District.Name == "Dufur SD 29" |
                      District.Name == "Hood River County SD")

prepare table to make a heatmap

## for all the education data with original values
ed.melt = melt(ed, id.vars = c("year", "District.Name"),
             measure.vars = c("On.Time.Grad.Rate", "Dropout.Rate" , 
                              "Percent.ELA.Proficient.Change", "Teacher.Experience.Pct", "Percent.Chronically.Absent",
                              "Percent.Economically.Disadvantaged"))

## for the education data with scaled values
ed.melt.scaled = melt(ed.scaled, id.vars = c("year", "District.Name"),
               measure.vars = c("On.Time.Grad.Rate", "Dropout.Rate" , 
                                "Percent.ELA.Proficient.Change", "Teacher.Experience.Pct", "Percent.Chronically.Absent",
                                "Percent.Economically.Disadvantaged")) 

### increaseing variables
ed.melt.increase = melt(ed.increase, id.vars = c("year", "District.Name"),
               measure.vars = c("On.Time.Grad.Rate", "Teacher.Experience.Pct",
                                "Percent.ELA.Proficient.Change")) %>%
  mutate(variable = factor(variable, levels = c("On.Time.Grad.Rate", "Teacher.Experience.Pct",
                                "Percent.ELA.Proficient.Change"))) %>%
        mutate(variable = recode(variable, "On.Time.Grad.Rate" = "On Time Graduation",
                            "Teacher.Experience.Pct" = "Teacher Experience",
                            "Percent.ELA.Proficient.Change" = "ELA Proficiency Change"))

ed.melt.decrease = melt(ed.decrease, id.vars = c("year", "District.Name"),
               measure.vars = c("Percent.Economically.Disadvantaged", "Percent.Chronically.Absent",
                                "Dropout.Rate")) %>%
  mutate(variable = recode(variable, "Percent.Economically.Disadvantaged" = "Economically Disadvantaged",
                           "Percent.Chronically.Absent" = "Chronic Absenteeism",
                           "Dropout.Rate"="Dropout Rate"))

Heat Maps

Original Scale

for south wasco alone:

sw <- filter(ed.melt, District.Name == "South Wasco County SD 1")
ggplot(sw, aes(y = variable, x = factor(year), fill = value)) +
  geom_tile(color = "#ADB5BD") + #gray
  scale_fill_viridis() +
  coord_equal()

Facet wrap to show all schools

ggplot(ed.melt, aes(y = variable, x = year, fill = value)) +
  geom_tile(color = "#ADB5BD") + #gray
  geom_text(aes(label = round(value,1)), color = "black") +
  coord_equal() + facet_wrap(~District.Name) +
  scale_fill_viridis(option = "D") + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  xlab("School Year")

Trying with rwb divergent palette: not working - Continuous value supplied to discrete scale.

continuous scale palettes: https://biostats.w.uib.no/color-scale-for-continuous-variables/ controlling midpoint and range for color palettes: https://stackoverflow.com/questions/58718527/setting-midpoint-for-continuous-diverging-color-scale-on-a-heatmap

# interval <- c(-60, -20, -10, -5, seq(0,100,5))
# data.values <- as.vector(na.omit(ed.melt$value))
# color_rwb <- cut(data.values, breaks=interval, labels = rwb(24))
pgcol <- brewer.pal(9, "PRGn")
pgpal <- colorRampPalette(pgcol)
ggplot(ed.melt, aes(y = variable, x = year, fill = value)) +
  geom_tile(color = "#ADB5BD") + #gray
  geom_text(aes(label = round(value,1)), color = "black") +
  coord_equal() + facet_wrap(~District.Name) +
  #scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
  scale_fill_continuous_divergingx(palette = "PRGn", mid = 0)
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  xlab("School Year")

Facet Heat maps Benefits to Student Success

ggplot(ed.melt.increase, aes(y = variable, x = year, fill = value)) +
  geom_tile(color = "#ADB5BD") + #gray
  geom_text(aes(label = round(value,1)), color = "black") +
  coord_equal() + facet_wrap(~District.Name) +
  #scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
  scale_fill_continuous_divergingx(palette = "PRGn", mid = 0) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  labs(title = "Benefits to Student Success", x ="School Year", y = "", fill="Percent") 
## Warning: Removed 12 rows containing missing values (geom_text).

Facet Heat Maps: Barriers to Student Success

ggplotly(ggplot(ed.melt.decrease, aes(y = variable, x = year, fill = value,
                              text = paste0("School District: ", District.Name,
                                "<br>Year: ", year,
                                "<br>Percent ", variable, ": ", value, "%"))) +
  geom_tile(color = "#ADB5BD") + #gray
  geom_text(aes(label = round(value,1)), color = "black") +
  coord_equal() + facet_wrap(~District.Name) +
  #scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
  scale_fill_continuous_sequential(palette = "Purples 3") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  labs(title = "Barriers to Student Success", 
       x ="School Year", y = "", fill="Percent"), 
  tooltip = "text")%>% 
  config(displayModeBar = "static", displaylogo = FALSE, 
         modeBarButtonsToRemove=list("zoom2d","select2d","lasso2d","hoverClosestCartesian",
                                     "hoverCompareCartesian","resetScale2d")) 
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

stacked heatmaps for Benefits to student success

ed.melt.increase2 <- ed.melt.increase %>% mutate(indicator = paste(variable, District.Name, sep= "-"))
ggplot(ed.melt.increase2, aes(y = indicator, x = year, fill = value)) +
  geom_tile(color = "#ADB5BD") + #gray
  geom_text(aes(label = round(value,1)), color = "black") +
  coord_equal() + 
  #scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
  scale_fill_continuous_divergingx(palette = "PRGn", mid = 0) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  
   ## banners
  geom_hline(yintercept = 6.5, color = "white", lwd = 5) + 
  geom_hline(yintercept = 12.5, color = "white", lwd = 5) +
  geom_hline(yintercept = 18.5, color = "white", lwd = 5) +

  # Title of variables
  annotate("text", x=2, y=6.5,label="ELA Proficiency Change", fontface=2, size = 3, color="black") +
  annotate("text", x=2, y=12.5,label="On Time Graduation", fontface=2, size = 3, color="black") +
  annotate("text", x=2, y=18.5,label="Teacher Experience", fontface=2, size = 3, color="black") +
  
  #scale_y_discrete(limit = rep(c("South Wasco County SD 1", "Sherman County SD" , "North Wasco County SD 21",
                             #"Jefferson County SD 509J", "Hood River County SD","Dufur SD 29"), 3)) +
  xlab("School Year")
## Warning: Removed 12 rows containing missing values (geom_text).

Try making heatmaps with cowplot each variable is its own heat map with all the schools.

ed.grad <- filter(ed.melt.increase, variable == "On Time Graduation")
ed.teach <- filter(ed.melt.increase, variable == "Teacher Experience")
ed.ela <- filter(ed.melt.increase, variable == "ELA Proficiency Change")

grad <- ggplot(ed.grad, aes(y = District.Name, x = year, fill = value)) +
  geom_tile(color = "#ADB5BD") + #gray
  geom_text(aes(label = round(value,1)), color = "black") +
  coord_equal() + 
  #scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
  scale_fill_continuous_divergingx(palette = "PRGn", mid = 0) +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank())

teach <- ggplot(ed.teach, aes(y = District.Name, x = year, fill = value)) +
  geom_tile(color = "#ADB5BD") + #gray
  geom_text(aes(label = round(value,1)), color = "black") +
  coord_equal() + 
  #scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
  scale_fill_continuous_divergingx(palette = "PRGn", mid = 0) +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank())

ela <- ggplot(ed.ela, aes(y = District.Name, x = year, fill = value)) +
  geom_tile(color = "#ADB5BD") + #gray
  geom_text(aes(label = round(value,1)), color = "black") +
  coord_equal() + 
  #scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
  scale_fill_continuous_divergingx(palette = "PRGn", mid = 0) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

plot_grid(
  grad, teach, ela,
  labels = c("On Time Graduation", "Teacher Experience", "ELA Proficiency Change"), ncol = 1
)
## Warning: Removed 12 rows containing missing values (geom_text).

try facet by question....

ggplot(ed.melt.increase, aes(y = District.Name, x = year, fill = value)) +
  geom_tile(color = "#ADB5BD") + #gray
  geom_text(aes(label = round(value,0)), color = "black", size = 3.5) +
  coord_equal() + 
  #facet_grid(rows = vars(variable)) +
  facet_wrap(~variable, ncol=1) +
  #scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
  scale_fill_continuous_divergingx(palette = "PRGn", mid = 0, breaks= c(-50, -25, 0, 25, 50, 75, 100), limits = c(-70, 100)) +
  theme(axis.text.x = element_text(angle = 35, vjust = 1, hjust=1), 
        strip.background = element_rect(
     color="black", fill="#ADB5BD", size=1, linetype="solid"),
      strip.text.y = element_text(size = 5, color = "black", face = "bold")) +
  labs(title = "Benefits to Student Success", x ="School Year", y = "", fill="Percent <br> Range: -60% to 100%")
## Warning: Removed 12 rows containing missing values (geom_text).

plotly version

ggplotly(ggplot(ed.melt.increase, aes(y = District.Name, x = year, fill = value,
                  text = paste0("School District: ", District.Name,
                                "<br>Year: ", year,
                                "<br>Percent ", variable, ": ", value, "%"))) +
  geom_tile(color = "#ADB5BD") + #gray
  #geom_text(aes(label = round(value,0)), color = "black") +
  coord_equal() + 
  #facet_grid(rows = vars(variable)) +
  facet_wrap(~variable, ncol=1) +
  #scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
  scale_fill_continuous_divergingx(palette = "PRGn", mid = 0) +
  theme(axis.text.x = element_text(angle = 20, vjust = 0.5, hjust=1)) +
  labs(title = "Benefits to Student Success", x ="School Year", y = "", fill="Percent"), tooltip="text")

Stacked Barriers to student success

ed.melt.decrease2 <- ed.melt.decrease %>% mutate(indicator = paste(variable, District.Name, sep= "-"))
ggplot(ed.melt.decrease2, aes(y = indicator, x = year, fill = value)) +
  geom_tile(color = "#ADB5BD") + #gray
  geom_text(aes(label = round(value,1)), color = "black") +
  coord_equal() + 
  #scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
  scale_fill_continuous_sequential(palette = "Purples 3") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  ## banners
  geom_hline(yintercept = 6.5, color = "white", lwd = 5) + 
  geom_hline(yintercept = 12.5, color = "white", lwd = 5) +
  geom_hline(yintercept = 18.5, color = "white", lwd = 5) +

  # Title of variables
  annotate("text", x=2, y=6.5,label="Chronic Absenteeism", fontface=2, size = 3, color="black") +
  annotate("text", x=2, y=12.5,label="Dropout", fontface=2, size = 3, color="black") +
  annotate("text", x=2, y=18.5,label="Economically Disadvantaged", fontface=2, size = 3, color="black") +
  
  xlab("School Year")

try facet by question

ggplot(ed.melt.decrease, aes(y = District.Name, x = year, fill = value)) +
  geom_tile(color = "#ADB5BD") + #gray
  geom_text(aes(label = round(value,0)), color = "black", size = 3.5) +
  coord_equal() + 
  #facet_grid(rows = vars(variable)) +
  facet_wrap(~variable, ncol=1) +
  #scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
  scale_fill_continuous_sequential(palette = "Purples 3") +
  theme(axis.text.x = element_text(angle = 35, vjust = 1, hjust=1), 
        strip.background = element_rect(
     color="black", fill="#ADB5BD", size=1, linetype="solid"),
      strip.text.y = element_text(size = 5, color = "black", face = "bold")) +
  labs(title = "Barriers to Student Success", x ="School Year", y = "", fill="Percent")